global input "Q:\dc1prhcmsas01\PU2\data - sas"
global temp "Q:\dc1prhcmsas01\PU2\temp_stata_wr"
global log "Q:\dc1prhcmsas01\PU2\Log - Stata WR"
global output "Q:\dc1prhcmsas01\PU2\data_stata_wr"
global graphs "Q:\dc1prhcmsas01\PU2\graphs_stata_wr"
global tables "Q:\dc1prhcmsas01\PU2\tables_stata_wr"

cd "Q:\dc1prhcmsas01\PU2\Code - Stata WR"
set scheme plotplain
graph set window fontface "Times New Roman"

capture log close
log using "$log/10_reg_hires_rd", replace


*********************************************************
*Clean Data
*********************************************************
*For each new hire, identify last observed job
	use "$output\new_hires_panel_wide", clear
	
	*Keep only if it's a previous job
		gen months_diff=mofd(yr_month) - mofd(month_hired)
		gen previous_job=months_diff<=0 & client_id!=client0
		keep if previous_job==1
			
	*Last observed job
		egen months_last_job=max(months_diff), by(emp_pur_c)
		keep if months_last_job==months_diff 
		
		*Keep highest salary if worked two jobs at once previously
		egen base_max=max(base), by(emp_pur_c client0 state0 month_hired)
		keep if base_max==base
		egen gross_max=max(gross), by(emp_pur_c client0 state0 month_hired)
		keep if round(gross_max)==round(gross)
		duplicates drop emp_pur_c client0 state0 month_hired, force
		count 
		
	*Change in base pay from last job 
		gen diff_base=(base0-base)/base
		gen log_base=log(base)
		
	*Drop outliers who get crazy changes in base pay 
		keep if base>100 & base<=10000
		drop base_max gross_max months_last_job previous_job
	
	*Probability of observing a previous job 
		merge 1:1 emp_pur_c client0 state0 month_hired using "$temp/hires_temp_wide"
		gen observe=_merge==3 
		drop _merge
		
	*Drop if only temporary layoff 
		duplicates tag emp_pur_c client0, gen(tag)
		egen min_hired=min(month_hired), by(emp_pur_c client0)
		keep if min_hired==month_hired
		
*Create variables for regression
	*Group base pay into bins 
	gen bin=floor((base0-913)/10)*10+913
	gen treat=base0>=913 & base0<=953
	gen above=bin>=913

	
	*Group months into quarters
	gen temp_months = mofd(month_hired)-mofd(mdy(05,1,2016))
	gen months_bin=floor(temp_months/3)*3
	

*********************************************************
*Fig C1
*********************************************************
	*Program to save estimates 
	cap program drop reg_est
	program define reg_est 
	args var name 
	
		reg `var' i.bin, noconstant vce(cluster client0)
	
		cap drop beta ub lb
		gen beta=.
		gen ub=.
		gen lb=.
		gen bin_reg=.
		gen n=.
		gen predicted=.
		
		*Point estimates
		local index=2
		levelsof bin, local(levels)
		foreach l of local levels {
			replace bin_reg=`l' if _n==`index'
			replace n=`e(N)' if _n==`index'
			replace beta=e(b)[1,`index'] if _n==`index'
			replace ub=e(b)[1,`index'] + 1.96*sqrt(e(V)[`index',`index']) if _n==`index'
			replace lb=e(b)[1,`index'] - 1.96*sqrt(e(V)[`index',`index']) if _n==`index'	
			local ++index
		}
		
		*Fitted values
		reghdfe `var' base0 if base0>=600 & base0<=1200, vce(cluster client0)
		replace predicted=_b[base0]*bin_reg+_b[_cons]
		
	keep if beta!=.
	keep bin_reg beta ub lb n predicted
	
	*Save estimates
	save "$output/`name'_estimates.dta", replace
	
	end 
	
	*Fig C1a: Before May 2016 //
	foreach var in base log_base diff_base months_diff observe {
		preserve 
		keep if month_hired<= mdy(04,1,2016)
		keep if bin>=513 & bin<=1313
		reg_est `var' figc1_raw_`var'_rd_pre
		restore 
	}
	
	*Fig C1b: After May 2016 //
	foreach var in base log_base diff_base months_diff observe {
		preserve 
		keep if month_hired>= mdy(05,1,2016) 
		keep if bin>=513 & bin<=1313
		reg_est `var' figc1_raw_`var'_rd_post
		restore 
	}
	

*********************************************************
*Fig C2
*********************************************************
	*Program to save estimates 
	cap program drop reg_loop
	program define reg_loop 
	args var name 
	
		preserve
		
		cap drop beta ub lb n
		gen beta=.
		gen ub=.
		gen lb=.
		gen event_time=.
		gen n=.
		gen spec=""
		
		local index=1
		
		*Regression for graphs
		levelsof months_bin, local(levels)
		foreach l of local levels {
			reghdfe `var' treat base0 if base0>=600 & base0<=1200 & months_bin==`l', vce(cluster client0)
			
			replace spec="One-Slope" if _n==`index'
			replace event_time=`l' if _n==`index'
			replace n=`e(N)' if _n==`index'
			replace beta=_b[treat] if _n==`index'
			replace ub=_b[treat] + 1.96*_se[treat] if _n==`index'
			replace lb=_b[treat] - 1.96*_se[treat] if _n==`index'	
			local ++index
		}		
		
		*Table estimates: Two slopes
		reghdfe `var' treat base0 c.base0#i.above if months_bin==6 & base0>=600 & base0<=1200, vce(cluster client0)
		replace spec="Two-Slope" if _n==`index'
		replace event_time=6 if _n==`index'
		replace n=`e(N)' if _n==`index'
		replace beta=_b[treat] if _n==`index'
		replace ub=_b[treat] + 1.96*_se[treat] if _n==`index'
		replace lb=_b[treat] - 1.96*_se[treat] if _n==`index'	
		local ++index
		
		*Table estimates: Right tail
		reghdfe `var' treat base0 if months_bin==6 & ((base0>=913 & base0<=1200) | treat==1), vce(cluster client0)
		replace spec="Right-Tail" if _n==`index'
		replace event_time=6 if _n==`index'
		replace n=`e(N)' if _n==`index'
		replace beta=_b[treat] if _n==`index'
		replace ub=_b[treat] + 1.96*_se[treat] if _n==`index'
		replace lb=_b[treat] - 1.96*_se[treat] if _n==`index'	
		local ++index
		
		*Difference in discontinuity
		gen treat_month=months_bin==6
		gen dd=treat_month*treat
		reghdfe `var' dd treat treat_month base0 c.base0#treat_month if base0>=600 & base0<=1200 & (months_bin==6 | months_bin==-3) , vce(cluster client0)
		replace spec="One-Slope-Diff" if _n==`index'
		replace event_time=6 if _n==`index'
		replace n=`e(N)' if _n==`index'
		replace beta=_b[dd] if _n==`index'
		replace ub=_b[dd] + 1.96*_se[dd] if _n==`index'
		replace lb=_b[dd] - 1.96*_se[dd] if _n==`index'	
		local ++index
			
		reghdfe `var' dd treat treat_month base0 c.base0#i.above c.base0#treat_month c.base0#treat_month#i.above if base0>=600 & base0<=1200 & (months_bin==6 | months_bin==-3) , vce(cluster client0)
		replace spec="Two-Slope-Diff" if _n==`index'
		replace event_time=6 if _n==`index'
		replace n=`e(N)' if _n==`index'
		replace beta=_b[dd] if _n==`index'
		replace ub=_b[dd] + 1.96*_se[dd] if _n==`index'
		replace lb=_b[dd] - 1.96*_se[dd] if _n==`index'	
		local ++index
		
		reghdfe `var' dd treat treat_month base0 c.base0#treat_month if ((base0>=913 & base0<=1200) | treat==1) & (months_bin==6 | months_bin==-3) , vce(cluster client0)
		replace spec="Right-Slope-Diff" if _n==`index'
		replace event_time=6 if _n==`index'
		replace n=`e(N)' if _n==`index'
		replace beta=_b[dd] if _n==`index'
		replace ub=_b[dd] + 1.96*_se[dd] if _n==`index'
		replace lb=_b[dd] - 1.96*_se[dd] if _n==`index'	
		local ++index			
			
		keep if beta!=.
		keep spec event_time beta ub lb n
		
		*Save estimates
		save "$output/`name'_estimates.dta", replace
		
		restore
	end 
	
	*Figure C2
	foreach var in base log_base  diff_base months_diff observe {
		reg_loop `var' figc2_`var'_rd_time
	}

*********************************************************
*Repeat for future outcomes post hire
*********************************************************
*For each new hire, identify salary in 18 months
	use "$output\new_hires_panel_wide", clear
	gen months_diff=mofd(yr_month) - mofd(month_hired)
	keep if months_diff==18 & client_id==client0 
	duplicates drop emp_pur_c client0 state0 month_hired, force
	
	*Generate wage growth 
	gen wage_growth=(base-base0)/base0 
	rename base base_future
	gen log_base_future=log(base_future)
	
	*Probability of observing wages 18 month post at same firm
	merge 1:1 emp_pur_c client0 state0 month_hired using "$temp/hires_temp_wide"
	gen separation=_merge==2
	drop _merge
		
*Create variables for regression
	*Group base pay into bins 
	gen bin=floor((base0-913)/10)*10+913
	gen treat=base0>=913 & base0<=953
	gen above=bin>=913
	
	*Group months into quarters
	gen temp_months = mofd(month_hired)-mofd(mdy(05,1,2016))
	gen months_bin=floor(temp_months/3)*3
	
*Fig C1a: Before May 2016 
	foreach var in base_future log_base_future wage_growth separation {
		preserve 
		keep if month_hired<= mdy(04,1,2016)
		keep if bin>=513 & bin<=1313
		reg_est `var' figc1_raw_`var'_rd_pre 16
		restore 
	}
	
*Fig C1b: After May 2016 
	foreach var in base_future log_base_future wage_growth separation {
		preserve 
		keep if month_hired>= mdy(05,1,2016) & month_hired<= mdy(04,1,2017)
		keep if bin>=513 & bin<=1313
		reg_est `var' figc1_raw_`var'_rd_post 16
		restore 
	}
	
*Figure C2
	foreach var in base_future log_base_future wage_growth separation {
		reg_loop `var' figc2_`var'_rd_time
	}
	
log close
